Fit models

Author

Max Lindmark & Francesca Vitale

Published

2024-05-17

# Load libraries
library(tidyverse)
library(tidylog)
library(sdmTMB)
library(patchwork)
library(viridis)
library(RColorBrewer)
library(modelr)
library(ggeffects)
library(ggsidekick); theme_set(theme_sleek())

home <- here::here()

# Load all custom functions in R/function

for(fun in list.files(paste0(home, "/R/functions"))){
  source(paste(home, "R/functions", fun, sep = "/"))
}

Read and prepare data

Read data & scale variables. Note we have convert data from catch per hour to catch per area in this script, to make better use of the get_index function in sdmTMB. Temperature data come from GLOBAL_MULTIYEAR_PHY_001_030

d <- readr::read_csv(paste0(home, "/data/clean/trawl.csv")) %>% 
  filter(depth > 0) %>% 
  mutate(temp_sc = as.numeric(scale(temp)),
         depth = log(depth), 
         depth_sc = as.numeric(scale(depth)),
         depth_sq = depth_sc*depth_sc,
         quarter_f = as.factor(quarter),
         year_f = as.factor(year))

# Read prediction grid
pred_grid <- readr::read_csv(paste0(home, "/data/clean/pred_grid.csv")) %>% 
  filter(depth > 0) %>% 
  mutate(temp_sc = as.numeric(scale(temp)),
         depth = log(depth),
         depth_sc = (depth - mean(d$depth)) / sd(d$depth),
         depth_sq = depth_sc*depth_sc,
         quarter_f = as.factor(quarter),
         year_f = as.factor(year))

Fit models. Initial exploration suggest a delta_gamma model yields better residuals than a Tweedie or a Poisson-link delta gamma.

mesh <- make_mesh(d,
                  xy_cols = c("X", "Y"),
                  cutoff = 4)
    
ggplot() +
  inlabru::gg(mesh$mesh) +
  coord_fixed() +
  geom_point(aes(X, Y), data = d, alpha = 0.2, size = 0.5) +
  annotate("text", -Inf, Inf, label = paste("n knots = ", mesh$mesh$n), hjust = -0.1, vjust = 2) + 
  labs(x = "Easting (km)", y = "Northing (km)")

We can compare a model with a spatial random effect and one with a spatiotemporal + spatial. Spatiotemporal is IID (for speed).

# Basic model
m0 <- sdmTMB(
  formula = density ~ year_f + s(depth_sc) + quarter_f + temp_sc,
  # formula = list(density ~ year_f + s(depth_sc) + quarter_f + temp_sc,
  #                density ~ year_f + quarter_f + temp_sc),
  data = d,
  mesh = mesh,
  family = delta_gamma(),
  spatiotemporal = "off",
  spatial = "on",
  time = "year")

sanity(m0)
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✖ `ln_smooth_sigma` standard error may be large
ℹ Try simplifying the model, adjusting the mesh, or adding priors
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
✔ Range parameters don't look unreasonably large

Inital exploration suggests the spatiotemporal random field for the bionomial model is estimated very small, hence it’s omitted here and only applies to the gamma component.

# Spatiotemporal instead?
library(tictoc)
tic()
m1 <- sdmTMB(
  formula = density ~ year_f + s(depth_sc) + quarter_f + temp_sc,
  data = d,
  mesh = mesh,
  family = delta_gamma(),
  spatiotemporal = list("off", "IID"),
  spatial = "on",
  time = "year")
toc()
48.019 sec elapsed
sanity(m1)
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✖ `ln_smooth_sigma` standard error may be large
ℹ Try simplifying the model, adjusting the mesh, or adding priors
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
✔ Range parameters don't look unreasonably large

AIC favours the spatiotemporal model

AIC(m0, m1)
   df      AIC
m0 71 4879.972
m1 72 4854.057

Check model fit. The spatiotemporal model also yields better QQ plots

d$Binomial <- residuals(m1, model = 1)
d$Gamma <- residuals(m1, model = 2)

d %>% 
  pivot_longer(c(Binomial, Gamma)) %>% 
  ggplot(aes(sample = value)) +
  stat_qq(size = 0.75, shape = 21, fill = NA) +
  stat_qq_line() +
  facet_wrap(~name) + 
  labs(y = "Sample Quantiles", x = "Theoretical Quantiles") +
  theme(aspect.ratio = 1)

Conditional effects

Initial exploration also suggests that depth should be modelled on the log scale. A smooth effect works nicely for the binomial part, but it can be questioned whether depth affects the biomass density at all. However, currently in delta models in sdmTMB, if using smoothers, the effect needs to shared among model components. We could circumvent this by providing different formulas in a list, say e.g., as linear and quadratic effect. But this works ok for now. It’s just a very uncertain effect. The temperature effects are very strong and very linear!

# Depth
p1 <- visreg_delta(m1, xvar = "depth_sc", model = 1, gg = TRUE) + 
  labs(x = "Scaled depth", title = "Binomial") 

p2 <- visreg_delta(m1, xvar = "depth_sc", model = 2, gg = TRUE) +
  labs(x = "Scaled depth", title = "Gamma")

p1 / p2 + 
  plot_layout(axes = "collect")

# Temperature
p3 <- visreg_delta(m1, xvar = "temp_sc", model = 1, gg = TRUE) +
  labs(x = "Scaled temperature", title = "Binomial")

p4 <- visreg_delta(m1, xvar = "temp_sc", model = 2, gg = TRUE) +
  labs(x = "Scaled temperature", title = "Gamma")

p3 / p4 +
  plot_layout(axes = "collect")

Spatial predictions

pred <- predict(m1, newdata = pred_grid, type = "response")

# Spatial random
p1 <- plot_map +
  geom_raster(data = pred %>% filter(year == max(year)),
              aes(X*1000, Y*1000, fill = omega_s1)) +
  scale_fill_gradient2(name = "Spatial random effect") +
  theme_sleek(base_size = 9) +
  theme(legend.position.inside = c(0.25, 0.1),
        legend.direction = "horizontal",
        legend.key.width = unit(0.7, "cm"),
        legend.key.height = unit(0.3, "cm")) +
  guides(fill = guide_colorbar(position = "inside",
                               title.position = "top",
                               title.hjust = 0.5)) + 
  geom_sf() +
  labs(subtitle = "Binomial")

p2 <- plot_map +
  geom_raster(data = pred %>% filter(year == max(year)),
              aes(X*1000, Y*1000, fill = omega_s2)) +
  scale_fill_gradient2(name = "Spatial random effect") +
  theme_sleek(base_size = 9) +
  theme(legend.position.inside = c(0.25, 0.1),
        legend.direction = "horizontal",
        legend.key.width = unit(0.7, "cm"),
        legend.key.height = unit(0.3, "cm")) +
  guides(fill = guide_colorbar(position = "inside",
                               title.position = "top",
                               title.hjust = 0.5)) + 
  geom_sf() +
  labs(subtitle = "Gamma")

p1 + p2

Spatiotemporal predictions

# Total predictions
plot_map_fc +
  geom_raster(data = pred %>% filter(quarter_f == "1"),
              aes(X*1000, Y*1000, fill = est)) +
  scale_fill_viridis(name = "Density (kg/km2)", trans = "sqrt") +
  facet_wrap(~year, ncol = 5) +
  theme_sleek(base_size = 9) +
  theme(legend.position.inside = c(0.195, 0.75),
        legend.direction = "vertical",
        legend.key.width = unit(0.2, "cm"),
        legend.key.height = unit(0.3, "cm"),
        legend.title = element_text(size = 6)) +
  guides(fill = guide_colorbar(
    position = "bottom", 
    title.position = "top",
    direction = "horizontal",
    title.hjust = 1, 
    theme = theme(legend.key.width  = unit(3, "cm")))) + 
  geom_sf() + 
  labs(subtitle = "Quarter 1")

plot_map_fc +
  geom_raster(data = pred %>% filter(quarter_f == "3"),
              aes(X*1000, Y*1000, fill = est)) +
  scale_fill_viridis(name = "Density (kg/km2)", trans = "sqrt") +
  facet_wrap(~year, ncol = 5) +
  theme_sleek(base_size = 9) +
  theme(legend.position.inside = c(0.195, 0.75),
        legend.direction = "vertical",
        legend.key.width = unit(0.2, "cm"),
        legend.key.height = unit(0.3, "cm"),
        legend.title = element_text(size = 6)) +
  guides(fill = guide_colorbar(
    position = "bottom", 
    title.position = "top",
    direction = "horizontal",
    title.hjust = 1, 
    theme = theme(legend.key.width  = unit(3, "cm")))) + 
  geom_sf() + 
  labs(subtitle = "Quarter 3")

Select a few years and plot quarters next to each other

plot_map_fc +
  geom_raster(data = pred %>%
                filter(year %in% c(1993, 2001, 2020)),
              aes(X*1000, Y*1000, fill = est)) +
  scale_fill_viridis(name = "Density (kg/km2)", trans = "sqrt") +
  facet_grid(quarter~year) +
  theme_sleek(base_size = 9) +
  theme(legend.position.inside = c(0.195, 0.75),
        legend.direction = "vertical",
        legend.key.width = unit(0.2, "cm"),
        legend.key.height = unit(0.3, "cm"),
        legend.title = element_text(size = 6)) +
  guides(fill = guide_colorbar(position = "bottom", 
    title.position = "top",
    direction = "horizontal",
    title.hjust = 1, 
    theme = theme(legend.key.width  = unit(3, "cm")))) + 
  geom_sf()
filter: removed 219,759 rows (89%), 25,854 rows remaining
Warning: Removed 48 rows containing missing values or values outside the scale range
(`geom_raster()`).

Plot area occupied over time

Here we rather arbitrarily chose a threshold of 50% to mean presence. A higher value yields too many years with no predictions for Q1. We predict this from the binomial component of the model. Note that the increase is very steep in Q3, which also has warmed more.

# Here simply defined as grids with >50% probability of occurrence
n_cells <- pred %>% filter(quarter == 1) %>% nrow()

pred %>% 
  mutate(presence = ifelse(est1 > 0.5, 1, 0)) %>% 
  summarise(n = sum(presence), .by = c(year, quarter)) %>% 
  mutate(prop = (n / n_cells) * 100) %>% 
  ggplot(aes(year, prop, color = factor(quarter), fill = factor(quarter))) + 
  geom_point() +
  scale_fill_brewer(palette = "Set1") +
  scale_color_brewer(palette = "Set1") +
  labs(color = "Quarter", fill = "Quarter", x = "Year",
       y = "% of cells with probability of occurence > 50%") +
  geom_smooth(method = "gam", formula = y~s(x, k = 20)) + 
  theme(legend.position = "bottom")

Get index over time

pred_q1 <- predict(m1, newdata = pred_grid %>% filter(quarter == 1),
                   return_tmb_object = TRUE)

pred_q3 <- predict(m1, newdata = pred_grid %>% filter(quarter == 3),
                   return_tmb_object = TRUE)

index_q1 <- get_index(pred_q1, area = 9, bias_correct = FALSE)
index_q3 <- get_index(pred_q3, area = 9, bias_correct = FALSE)

index <- bind_rows(index_q1 %>% mutate(quarter = as.factor(1)),
                   index_q3 %>% mutate(quarter = as.factor(3)))

# Mean in data
d %>% 
  summarise(dens = mean(density), .by = c(year, quarter)) %>% 
  ggplot(aes(year, dens, color = as.factor(quarter))) + 
  geom_line() +
  scale_color_brewer(palette = "Set1", name = "Quarter") +
  theme(legend.position = "bottom") +
  labs(subtitle = "Mean density in data",
       x = "Year", y = "Density (kg/km2)")

# Index
ggplot(index, aes(year, est/1000, color = quarter, fill = quarter)) +
  geom_ribbon(aes(ymin = lwr/1000, ymax = upr/1000), alpha = 0.3, color = NA) +
  geom_line() +
  scale_color_brewer(palette = "Set1") + 
  scale_fill_brewer(palette = "Set1") +
    labs(subtitle = "Spatiotemporal index", color = "Quarter",
         fill = "Quarter", x = "Year", y = "Biomass estimate (tonnes)")